R Markdown

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.0 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## Loading required package: timechange
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(leaflet)

Including Plots

You can also embed plots, for example:

df %>%
  group_by(speed_limit)%>%
  summarise(proportion_severe = mean(accident_severity))
## # A tibble: 6 × 2
##   speed_limit proportion_severe
##         <int>             <dbl>
## 1          20             0.157
## 2          30             0.197
## 3          40             0.236
## 4          50             0.248
## 5          60             0.327
## 6          70             0.246
date_df <-
  df%>%
  group_by(date)%>%
  summarise(proportion_severe = mean(accident_severity))

ggplot(date_df, aes(x=date, y=proportion_severe ))+
  geom_point()+
  geom_smooth()+
  annotate('rect',xmin=as.Date("2020-03-23"),xmax=as.Date("2020-06-15"),ymin=0.1,ymax=Inf, fill='red', alpha=0.25) #Start of lockdown
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

 # geom_vline(xintercept=as.Date("2020-06-15")) #Non Essential shops reopen
#No immediately clear winter effect by quarter/month
time_df<-
  df%>%
  mutate(hour = hour(time))%>%
  mutate(month = month(date))%>%
  group_by(hour, month)%>%
  summarise(accident_proportion = mean(accident_severity))
## `summarise()` has grouped output by 'hour'. You can override using the
## `.groups` argument.
ggplot(time_df, aes(x=hour, y=accident_proportion))+
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#No immediately clear winter effect by quarter/month
time_df<-
  df%>%
  mutate(hour = hour(time))%>%
  mutate(day =week(date))%>%
  group_by(hour, day)%>%
  summarise(accident_proportion = mean(accident_severity))
## `summarise()` has grouped output by 'hour'. You can override using the
## `.groups` argument.
ggplot(time_df, aes(x=hour, y=accident_proportion, group=day))+
  geom_line()

lsoa_df <-
  read.csv('./data/LSOA_(2011)_to_LSOA_(2021)_to_Local_Authority_District_(2022)_Lookup_for_England_and_Wales.csv')


lad_prop_df<-
  df %>%
  merge(.,lsoa_df, by.x="lsoa_of_accident_location", by.y="LSOA21CD")%>%
  group_by(LAD22CD)%>%
  summarise(accident_proportion = mean(accident_severity))


uk_regions <- geojsonio::geojson_read("https://martinjc.github.io/UK-GeoJSON/json/eng/topo_lad.json", what = "sp")
## Registered S3 method overwritten by 'geojsonsf':
##   method        from   
##   print.geojson geojson
uk_regions <- uk_regions[uk_regions$id %in% lad_prop_df$LAD22CD, ]

bins <-c(0,0.1,0.2,0.3,0.4,0.5,0.6)
pal <- colorBin("Reds", domain = lad_prop_df$accident_proportion, bins=bins )

leaflet(uk_regions)%>%
  addTiles('https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}') %>%
  addPolygons(stroke = FALSE, smoothFactor = 0.3,opacity = 1,fillOpacity = 0.7,dashArray = "3",
    fillColor = ~pal(lad_prop_df$accident_proportion),)%>%
  addLegend(pal = pal, values = lad_prop_df$accident_proportion, opacity = 0.7, title = NULL,
  position = "bottomright")
mdl <- glm(accident_severity ~ speed_limit +night+weekend+season,data = df, family = binomial())

summary(mdl)
## 
## Call:
## glm(formula = accident_severity ~ speed_limit + night + weekend + 
##     season, family = binomial(), data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0845  -0.6904  -0.6503  -0.6025   1.9368  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.0452614  0.0264636 -77.286  < 2e-16 ***
## speed_limit   0.0167986  0.0005489  30.605  < 2e-16 ***
## night         0.3565591  0.0290797  12.261  < 2e-16 ***
## weekend       0.0880066  0.0288110   3.055  0.00225 ** 
## seasonspring  0.2024068  0.0246309   8.218  < 2e-16 ***
## seasonsummer  0.1406371  0.0220018   6.392 1.64e-10 ***
## seasonautumn  0.0949846  0.0217818   4.361 1.30e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 95283  on 91186  degrees of freedom
## Residual deviance: 94127  on 91180  degrees of freedom
## AIC: 94141
## 
## Number of Fisher Scoring iterations: 4
speeds <- seq(0,100,1)
n_samples <- length(speeds)

newdata = list(
  speed_limit=speeds, 
  night=rep(0,n_samples), 
  weekend=rep(0,n_samples),
  season = rep('winter',n_samples))

predicted_vals <- predict(mdl,type = "response", newdata= newdata, se.fit=TRUE)
                                                                                    
results <-
  tibble(
    speeds =speeds,
    mu = predicted_vals$fit, 
    upper_ci =  predicted_vals$fit +1.96*predicted_vals$se.fit,
    lower_ci =  predicted_vals$fit -1.96*predicted_vals$se.fit
)

newdata_summer = list(
  speed_limit=speeds, 
  night=rep(0,n_samples), 
  weekend=rep(0,n_samples),
  season = rep('summer',n_samples))

predicted_vals <- predict(mdl,type = "response", newdata= newdata_summer, se.fit=TRUE)
                                                                                    
results_summer <-
  tibble(
    speeds =speeds,
    mu = predicted_vals$fit, 
    upper_ci =  predicted_vals$fit +1.96*predicted_vals$se.fit,
    lower_ci =  predicted_vals$fit -1.96*predicted_vals$se.fit
)

ggplot(data=results, aes(x=speeds, y=mu))+
  geom_line()+
  geom_line(data=results_summer)+
  geom_ribbon(data=results_summer,aes(ymin=lower_ci, ymax=upper_ci), fill='red', alpha=0.5)+
  geom_ribbon(aes(ymin=lower_ci, ymax=upper_ci), fill='red', alpha=0.5)

#plot( seq(-0,70,1),predict(mdl,type = "response", newdata = list(speed_limit=seq(-0,70,1), night=rep(0,length(seq(0,70,1))))))
vehicle_df <-
  read_csv('./data/dft-road-casualty-statistics-vehicle-2020.csv.xls')
## Rows: 167375 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): accident_index, accident_reference, generic_make_model
## dbl (24): accident_year, vehicle_reference, vehicle_type, towing_and_articul...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vehicle_df <-
  merge(vehicle_df,df, by="accident_index")

sun_in_eyes <-
  vehicle_df%>%
  filter(vehicle_direction_from == 7|vehicle_direction_from == 3)%>%
  mutate(month=month(date))%>%
  mutate(hour=hour(time))%>%
  filter(hour>6)%>%
  group_by(month, hour)%>%
  summarise(accident_proportion = mean(accident_severity))
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
ggplot(sun_in_eyes, aes(hour, accident_proportion, color=month))+
  geom_point()

df <-
  read_csv('./data/dft-road-casualty-statistics-casualty-2020.csv.xls')
## Rows: 115584 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): accident_index, accident_reference
## dbl (16): accident_year, vehicle_reference, casualty_reference, casualty_cla...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.